import os
import cv2
import numpy as np
import plotly.graph_objects as go
from matplotlib import pyplot as plt
try:
from PIL import Image
except ImportError:
import Image
import pytesseract
np.seterr(divide='ignore', invalid='ignore')
# Tesseract-OCR executable
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
# Image samples
SAMPLE_1 = os.path.join('images', 'sample01.png')
SAMPLE_1_GROUND_TRUTH = os.path.join('out', 'correct_1.txt')
SAMPLE_2 = 'images/sample02.png'
SAMPLE_2_GROUND_TRUTH = os.path.join('out', 'correct_2.txt')
DPI = (300, 300)
def indent(string, num_spaces, addTripleQuotes=True):
if addTripleQuotes:
string = "'''\n" + string + "\n'''\n"
return '\n'.join([((' ' * num_spaces) + line) for line in string.split("\n")])
def imshow1(img, title, figsize=(14,14)):
plt.figure(figsize=figsize)
plt.imshow(img, cmap = 'gray')
plt.title(title);
def imshow2(img, title, figsize=(10,10)):
plt.figure(figsize=figsize)
plt.imshow(img, cmap = 'gray')
plt.title(title);
def generate_image_dpi(path, dest, dpi=DPI):
os.makedirs(os.path.dirname(dest), exist_ok=True)
img = Image.open(path)
img.save(dest, dpi=dpi)
return Image.open(dest)
def intensity_hotspot(img, i_low, i_high, figsize=(12,12)):
img = np.array(img)
thres = np.ones(img.shape).astype(np.uint8)
thres[np.where(img >= i_low) and np.where(img <= i_high)] = 0
plt.figure(figsize=figsize)
plt.imshow(thres, cmap = 'gray')
return thres
from collections import Counter
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
def get_cosine_sim(query, document):
text = [query, document]
count_vec = CountVectorizer()
count_vec.fit(text)
arr = count_vec.transform(text).toarray()
vector1, vector2 = arr
return cosine_similarity(np.array([vector1]), np.array([vector2]))[0][0]
def get_jaccard_sim(query, document):
query = query.split()
document = document.split()
intersection = set(query).intersection(set(document))
union = set(query).union(set(document))
print(f"Jaccard errors:\n{sorted(union - intersection)}\n")
if len(union) == 0:
print('No similar words')
return 0
return len(intersection)/len(union)
def get_levenshtein_sim(query, document):
size_x = len(query) + 1
size_y = len(document) + 1
matrix = np.zeros((size_x, size_y), dtype=int)
for x in range(size_x):
matrix [x, 0] = x
for y in range(size_y):
matrix [0, y] = y
for x in range(1, size_x):
for y in range(1, size_y):
if query[x-1] == document[y-1]:
matrix [x,y] = min(
matrix[x-1, y] + 1,
matrix[x-1, y-1],
matrix[x, y-1] + 1
)
else:
matrix [x,y] = min(
matrix[x-1, y] + 1,
matrix[x-1, y-1] + 1,
matrix[x, y-1] + 1
)
# print (matrix)
distance = matrix[size_x - 1, size_y - 1]
return (size_x + size_y - distance) / (size_x + size_y)
import string
whitelist = string.ascii_lowercase + string.ascii_uppercase + ":-.,!\\\"\\\' "
print(f"Whitelist: {whitelist}")
Whitelist: abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ:-.,!\"\'
# custom_config = r'-c tessedit_char_blacklist=\©_|[] textord_space_size_is_variable=1 preserve_interword_spaces=1 --psm 3 --oem 3'
custom_config = f'-c tessedit_char_whitelist="{whitelist}" textord_space_size_is_variable=1 preserve_interword_spaces=1 --psm 3 --oem 0'
def get_metric_score(img, ground_truth_pth):
img_str = pytesseract.image_to_string(img, config=custom_config).strip()
print(f"{'=' * 18} Results {'=' * 18}")
print(f"\n{indent(img_str, 3)}")
with open(ground_truth_pth, 'r', encoding="utf8") as f:
ground_truth = f.read().strip()
jaccard_sim = get_jaccard_sim(img_str, ground_truth)
cosine_sim = get_cosine_sim(img_str, ground_truth)
levenshtein_sim = get_levenshtein_sim(img_str, ground_truth)
print(f"-> Jaccard Similarity: \t{jaccard_sim:.5f}")
print(f"-> Cosine Similarity: \t{cosine_sim:.5f}")
print(f"-> Levenshtein Similarity:\t{levenshtein_sim:.5f}")
avg_score = (jaccard_sim + cosine_sim + levenshtein_sim) / 3
print(f"\n-> Average Score: {avg_score:.5f}")
return avg_score, [jaccard_sim, cosine_sim, levenshtein_sim]
def rotate_image(img, angle):
if angle == 0:
return img
image_center = tuple(np.array(img.shape[1::-1]) / 2)
rot_matrix = cv2.getRotationMatrix2D(image_center, angle, 1.0)
rotated = cv2.warpAffine(img, rot_matrix, img.shape[1::-1],
flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
return rotated
def invert_thres(binary_img):
return (~(binary_img*255) / 255).astype('uint8')
def contrast_stretching(img):
img = np.array(img)
min_ = img.min()
max_ = img.max()
img = np.subtract(img, min_)
img = np.multiply(img, 255 / (max_ - min_))
return img.astype(np.uint8)
def thresholding(img, low):
assert low < 256 and low > -1
# img = img.convert('L')
img = np.array(img)
thres = np.zeros(img.shape).astype(np.uint8)
thres[np.where(img > low)] = 1
return thres
def otsu_thresholding(img):
# img = np.array(img)
matrix = np.product(img.shape)
min_var = []
hist, bin_edges = np.histogram(img, bins=256, range=(0,256))
for temp_intensity in np.arange(1, 255, 1):
thres = thresholding(img, temp_intensity)
# Computing Weights
w_a = np.sum(hist[:temp_intensity]) / float(matrix)
w_b = np.sum(hist[temp_intensity:]) / float(matrix)
# Computing Variances
sigma_a = np.var(img[np.where(thres == 0)])
sigma_b = np.var(img[np.where(thres == 1)])
min_var.append(w_a * sigma_a + w_b * sigma_b)
otsu_thres = np.nanargmin(min_var)
otsu_img = thresholding(img, otsu_thres)
return otsu_img, otsu_thres
def get_skew_angle(img):
otsu, otsu_thres = otsu_thresholding(img)
invert_otsu = invert_thres(otsu).astype('uint8')
coords = np.column_stack(np.where(invert_otsu > 0))
angle = cv2.minAreaRect(coords)[-1]
if angle < -45:
return -(90 + angle)
else:
return -angle
# Original image results
get_metric_score(SAMPLE_1, SAMPLE_1_GROUND_TRUTH);
================== Results ================== ''' Parking: You may park anywhere on the ce king. Keep in mind the carpool hours and park afternoon Under School Age Children:While we love inappropriate to have them on campus d that they may be invited or can accompany : you adhere to our policy for the benefit of ''' Jaccard errors: [':', 'There', 'a', 'accordingly', 'are', 'ask', 'blocked', 'but', 'ce', 'children,', 'd', 'disruptive', 'do', 'during', 'get', 'hours.', 'it', 'no', 'not', 'otherwise', 'par-', 'parent', 'prohibiting', 'school', 'signs', 'so', 'special', 'staff.', 'students', 'there', 'times', 'volunteer,', 'where', 'younger'] -> Jaccard Similarity: 0.54054 -> Cosine Similarity: 0.84615 -> Levenshtein Similarity: 0.67647 -> Average Score: 0.68772
img = generate_image_dpi(SAMPLE_1, os.path.join("processed", f"sample01_dpi_{DPI[0]}.png"), dpi=DPI)
print(f"Format: \t{img.format}\nSize: \t\t{img.size}\nChannels: \t{img.mode}")
fig1 = go.Figure()
img = img.convert('L')
img = np.array(img)
img = cv2.resize(img, None, fx=1.4, fy=1.4, interpolation=cv2.INTER_CUBIC)
imshow1(img, 'Step 0: Original (Upscaled by 1.4x)')
fig1.add_trace(go.Histogram(x=img.ravel(), nbinsx=256, name="Step 0: Original"))
# Remove shadow
median_blur_max = cv2.medianBlur(img, 301)
imshow1(median_blur_max, f'Step 1: Get Shadow - Maximum Median Blur')
fig1.add_trace(go.Histogram(x=median_blur_max.ravel(), nbinsx=256, name="Step 1: Get shadow"))
shadow_removed = img / median_blur_max
new_img = contrast_stretching(shadow_removed)
img = new_img.astype('uint8')
imshow1(img, f'Step 2: Remove Shadow')
fig1.add_trace(go.Histogram(x=img.ravel(), nbinsx=256, name="Step 2: Remove Shadow"))
# Rotate image
angle = get_skew_angle(img)
if (angle != 0):
print(f"Angle to rotate: {angle}")
img = rotate_image(img, angle)
imshow1(img, f"Step 3: Rotate by '{angle:.5f}' degrees")
fig1.add_trace(go.Histogram(x=img.ravel(), nbinsx=256, name="Step 3: Rotate Image"))
else:
print(f"Skew angle is 0, rotation unneccessary.")
im = Image.fromarray(img)
im.save(os.path.join("processed", f"sample01_processed.png"), dpi=DPI)
Format: PNG Size: (965, 229) Channels: RGBA Angle to rotate: -1.29095458984375
# Histogram Plots for the above processed images
# Commented to keep within Github 100mb space limits.
# Uncomment to see interactive histogram plot. Alternatively, refer to 'OCR_Project.html'
fig1.update_layout(
title="'sample01.png' Histogram",
xaxis_title="Pixel Intensity",
yaxis_title="Count",
legend_title="Steps",
autosize=True,
width=975,
height=550
)
fig1.show()